# This is an initial script for handling raw data to transform it for analysis.
# Script for merging raw data for analysis
# Merges in and then drops Free Votes
# Drops party switchers
# Drops free votes from speech files
# Produces Summary Statistics Appearing in Table 1 of Paper

rm(list=ls())
library(plyr)


##### SET YOUR PATHS HERE #####
##########################################

path1 <- "~/Dropbox (Personal)/Rebel Summaries/APSR_SKLLO_Repfiles/RawData/"
path2 <- "~/Dropbox (Personal)/Rebel Summaries/APSR_SKLLO_RepFiles/CalcData"


# Set working directory to the folder holding the raw data
setwd(path1)

#### Start by reading in party affilation and leader data containing full set of MPs and positions
leader <- read.csv("UK HoC Leadership Data 1979-2016.csv")

## Keep data only for the periods we are looking at
leader9297 <- leader[,grep("X1992_4",colnames(leader),perl=T):grep("X1997_4",colnames(leader),perl=T)]
leader9701 <- leader[,grep("X1997_5",colnames(leader),perl=T):grep("X2001_5",colnames(leader),perl=T)]

leader0510 <- leader[,grep("X2005_5",colnames(leader),perl=T):grep("X2010_4",colnames(leader),perl=T)]
leader1015 <- leader[,grep("X2010_5",colnames(leader),perl=T):grep("X2015_4",colnames(leader),perl=T)]

### Get rid members who were not in parl or only served a partial term
### 0 in these data mean MP was not in parl at the time. 
### Keep PubWhipIDs only of those MPs serving a full term in each parl.
### Drop partyswitchers within term by keeping only MPs labelled Lab and Con throughout

leader9297.mp <- leader9297>0
leader9297    <- leader9297[rowMeans(leader9297.mp)==1 & (leader$Party.Affiliation==1 | leader$Party.Affiliation==2 ) , ]
PubWhipID.92  <- leader$PubWhipID[rowMeans(leader9297.mp)==1 & (leader$Party.Affiliation==1 | leader$Party.Affiliation==2 )]

leader9701.mp <- leader9701>0
leader9701 <- leader9701[rowMeans(leader9701.mp)==1 & (leader$Party.Affiliation==1 | leader$Party.Affiliation==2 ) , ]
PubWhipID.97 <- leader$PubWhipID[rowMeans(leader9701.mp)==1 & (leader$Party.Affiliation==1 | leader$Party.Affiliation==2 ) ]

leader0510.mp <- leader0510>0
leader0510 <- leader0510[rowMeans(leader0510.mp)==1  & (leader$Party.Affiliation==1 | leader$Party.Affiliation==2 ) ,]
PubWhipID.05 <- leader$PubWhipID[rowMeans(leader0510.mp)==1 & (leader$Party.Affiliation==1 | leader$Party.Affiliation==2 ) ]

leader1015.mp <- leader1015>0
leader1015 <- leader1015[rowMeans(leader1015.mp)==1 & (leader$Party.Affiliation==1 | leader$Party.Affiliation==2 ) ,]
PubWhipID.10 <- leader$PubWhipID[rowMeans(leader1015.mp)==1 & (leader$Party.Affiliation==1 | leader$Party.Affiliation==2 ) ]

## List of dropped MPs in 92-97

# Dropped MPs
dropped92 <- leader$PubWhipID[ (rowMeans(leader9297.mp) > 0 & rowMeans(leader9297.mp) < 1 ) | (rowMeans(leader9297.mp) > 0 & leader$Party.Affiliation==0 ) ]
dropped97 <- leader$PubWhipID[ (rowMeans(leader9701.mp) > 0 & rowMeans(leader9701.mp) < 1 ) | (rowMeans(leader9701.mp) > 0 & leader$Party.Affiliation==0 ) ]
dropped05 <- leader$PubWhipID[ (rowMeans(leader0510.mp) > 0 & rowMeans(leader0510.mp) < 1 ) | (rowMeans(leader0510.mp) > 0 & leader$Party.Affiliation==0 ) ]
dropped10 <- leader$PubWhipID[ (rowMeans(leader1015.mp) > 0 & rowMeans(leader1015.mp) < 1 ) | (rowMeans(leader1015.mp) > 0 & leader$Party.Affiliation==0 ) ]


drop92 <- leader[ leader$PubWhipID %in% dropped92 , c(4,5,6) ]
drop97 <- leader[ leader$PubWhipID %in% dropped97 , c(4,5,6) ]
drop05 <- leader[ leader$PubWhipID %in% dropped05 , c(4,5,6) ]
drop10 <- leader[ leader$PubWhipID %in% dropped10 , c(4,5,6) ]


### Create dummy for members who were leaders throughout the whole parl.
### Leaders are 6 and above (Min of State/AG/ and opp equivalents and above)

leader9297.bin.ld <- leader9297>=6
leader9297.ld <- rep(0,length(PubWhipID.92))
leader9297.ld[rowMeans(leader9297.bin.ld)==1] <- 1
bin.leader9297 <- as.data.frame(cbind(PubWhipID.92,leader9297.ld))

colnames(bin.leader9297) <- c("PubWhipID","leader.all")
bin.leader9297 <- bin.leader9297[!is.na(bin.leader9297$PubWhipID),]

leader9701.bin.ld <- leader9701>=6
leader9701.ld <- rep(0,length(PubWhipID.97))
leader9701.ld[rowMeans(leader9701.bin.ld)==1] <- 1
bin.leader9701 <- as.data.frame(cbind(PubWhipID.97,leader9701.ld))

colnames(bin.leader9701) <- c("PubWhipID","leader.all")
bin.leader9701 <- bin.leader9701[!is.na(bin.leader9701$PubWhipID),]

leader0510.bin.ld <- leader0510>=6
leader0510.ld <- rep(0,length(PubWhipID.05))
leader0510.ld[rowMeans(leader0510.bin.ld)==1] <- 1
bin.leader0510 <- as.data.frame(cbind(PubWhipID.05,leader0510.ld))

colnames(bin.leader0510) <- c("PubWhipID","leader.all")
bin.leader0510 <- bin.leader0510[!is.na(bin.leader0510$PubWhipID),]

leader1015.bin.ld <- leader1015>=6
leader1015.ld <- rep(0,length(PubWhipID.10))
leader1015.ld[rowMeans(leader1015.bin.ld)==1] <- 1
bin.leader1015 <- as.data.frame(cbind(PubWhipID.10,leader1015.ld))

colnames(bin.leader1015) <- c("PubWhipID","leader.all")
bin.leader1015 <- bin.leader1015[!is.na(bin.leader1015$PubWhipID),]

### Now read in rebel data
### Early period 1992--2001

## Read in data for 92-97
dta.92 <- read.csv("1992-1993/rebels.csv")
dta.93 <- read.csv("1993-1994/rebels.csv")
dta.94 <- read.csv("1994-1995/rebels.csv")
dta.95 <- read.csv("1995-1996/rebels.csv")
dta.96 <- read.csv("1996-1997/rebels.csv")

## merge rebel data for 92-97 
dta.temp <- join(dta.92,dta.93,by="PubWhipID",type="full")
dta.temp <- join(dta.temp,dta.94,by="PubWhipID",type="full")
dta.temp <- join(dta.temp,dta.95,by="PubWhipID",type="full")
dta.9297 <- join(dta.temp,dta.96,by="PubWhipID",type="full")

## Read in data for 97-01
dta.97 <- read.csv("1997-1998/rebels.csv")
dta.98 <- read.csv("1998-1999/rebels.csv")
dta.99 <- read.csv("1999-2000/rebels.csv")
dta.00 <- read.csv("2000-2001/rebels.csv")

## Merge rebel data for 97-01
dta.temp <- join(dta.97,dta.98,by="PubWhipID",type="full")
dta.temp <- join(dta.temp,dta.99,by="PubWhipID",type="full")
dta.9701 <- join(dta.temp,dta.00,by="PubWhipID",type="full")

totdiv.9297 <- ncol(dta.9297)-5
totdiv.9701 <- ncol(dta.9701)-5


# Read in free vote data for 92-97
fv.92 <- read.csv("1992-1993/divisions.csv")
fv.93 <- read.csv("1993-1994/divisions.csv")
fv.94 <- read.csv("1994-1995/divisions.csv")
fv.95 <- read.csv("1995-1996/divisions.csv")
fv.96 <- read.csv("1996-1997/divisions.csv")
fv.97 <- read.csv("1997-1998/divisions.csv")
fv.98 <- read.csv("1998-1999/divisions.csv")
fv.99 <- read.csv("1999-2000/divisions.csv")
fv.00 <- read.csv("2000-2001/divisions.csv")

fv.9297 <- rbind(fv.92,fv.93,fv.94,fv.95,fv.96)
fv.9701 <- rbind(fv.97,fv.98,fv.99,fv.00)

# Subset and then drop free votes 
fdiv.9297 <- fv.9297$Division[fv.9297$FreeVote==1]
fdiv.9701 <- fv.9701$Division[fv.9701$FreeVote==1]

fdiv.9297 <- paste("X", as.character(fdiv.9297), sep = "")
fdiv.9297 <- gsub("-",".",fdiv.9297)

fdiv.9701 <- paste("X", as.character(fdiv.9701), sep = "")
fdiv.9701 <- gsub("-",".",fdiv.9701)

# Drop free votes (if desired... comment out to keep them)
dta.9297 <- dta.9297[,colnames(dta.9297) %in% fdiv.9297==F]
dta.9701 <- dta.9701[,colnames(dta.9701) %in% fdiv.9701==F]


## Read in later period data
dta.0510 <- read.csv("2005-2010/rebels.csv")
dta.1015 <- read.csv("2010-2015/rebels.csv")

totdiv.0510 <- ncol(dta.0510)-5
totdiv.1015 <- ncol(dta.1015)-5

## Read in later period free vote data
fv.0510 <- read.csv("2005-2010/divisions.csv")
fv.1015 <- read.csv("2010-2015/divisions.csv")

fdiv.0510 <- fv.0510$Division[fv.0510$FreeVote==1|fv.0510$FreeVLab==1|fv.0510$FreeVCon==1]
fdiv.1015 <- fv.1015$Division[fv.1015$FreeVote==1]

fdiv.0510 <- paste("X", as.character(fdiv.0510), sep = "")
fdiv.0510 <- gsub("-",".",fdiv.0510)

fdiv.1015 <- paste("X", as.character(fdiv.1015), sep = "")
fdiv.1015 <- gsub("-",".",fdiv.1015)

# Drop free votes
dta.0510 <- dta.0510[,colnames(dta.0510) %in% fdiv.0510==F]
dta.1015 <- dta.1015[,colnames(dta.1015) %in% fdiv.1015==F]

# JBS: 8 November 2016

#### Merge rebel vote data with leader data 
#### This allows us to drop other parties and partyswitchers, too
#### Move leader.all variable up front (drop duplicate PubWhipID var)

dta.9297 <- join(dta.9297, bin.leader9297,by="PubWhipID", type="inner") 
dta.9701 <- join(dta.9701, bin.leader9701,by="PubWhipID", type="inner") 

dta.9297 <- dta.9297[,c(1:5,ncol(dta.9297),7:ncol(dta.9297)-1)]
dta.9701 <- dta.9701[,c(1:5,ncol(dta.9701),7:ncol(dta.9701)-1)]

dta.0510 <- join(dta.0510, bin.leader0510,by="PubWhipID", type="inner") 
dta.1015 <- join(dta.1015, bin.leader1015,by="PubWhipID", type="inner") 

dta.0510 <- dta.0510[,c(1:5,ncol(dta.0510),7:ncol(dta.0510)-1)]
dta.1015 <- dta.1015[,c(1:5,ncol(dta.1015),7:ncol(dta.1015)-1)]

### Calculate total number of non-free votes

totdiv.nf.9297 <- ncol(dta.9297)-6
totdiv.nf.9701 <- ncol(dta.9701)-6

totdiv.nf.0510 <- ncol(dta.0510)-6
totdiv.nf.1015 <- ncol(dta.1015)-6


## Calculate rebel data 1992--2001
tot.reb.92 <- rowSums(dta.9297[,7:ncol(dta.9297)],na.rm=T)
tot.absent.92 <- rowSums(is.na(dta.9297[,7:ncol(dta.9297)]))
tot.withparty.92 <- ncol(dta.9297[,7:ncol(dta.9297)])-tot.reb.92-tot.absent.92

tot.reb.97 <- rowSums(dta.9701[,7:ncol(dta.9701)],na.rm=T)
tot.absent.97 <- rowSums(is.na(dta.9701[,7:ncol(dta.9701)]))
tot.withparty.97 <- ncol(dta.9701[,7:ncol(dta.9701)])-tot.reb.97-tot.absent.97

tot.reb.dta.92 <- cbind(dta.9297[,c(1:6)],tot.reb.92,tot.withparty.92)
tot.reb.dta.97 <- cbind(dta.9701[,c(1:6)],tot.reb.97,tot.withparty.97)

merge9297 <- merge(tot.reb.dta.92, tot.reb.dta.97, by="PubWhipID",all=T)
merge9297$mostrebel <- (merge9297$tot.reb.92 + merge9297$tot.reb.97)
merge9297 <- merge9297[,c("PubWhipID","mostrebel")]

tot.reb.dta.92 <- merge(merge9297,tot.reb.dta.92, by = "PubWhipID")
tot.reb.dta.97 <- merge(merge9297,tot.reb.dta.97, by = "PubWhipID")


## Calculate rebel data 2005--2010
tot.reb.05 <- rowSums(dta.0510[,7:ncol(dta.0510)],na.rm=T)
tot.absent.05 <- rowSums(is.na(dta.0510[,7:ncol(dta.0510)]))
tot.withparty.05 <- ncol(dta.0510[,7:ncol(dta.0510)])-tot.reb.05-tot.absent.05

tot.reb.10 <- rowSums(dta.1015[,7:ncol(dta.1015)],na.rm=T)
tot.absent.10 <- rowSums(is.na(dta.1015[,7:ncol(dta.1015)]))
tot.withparty.10 <- ncol(dta.1015[,7:ncol(dta.1015)])-tot.reb.10-tot.absent.10

tot.reb.dta.05 <- cbind(dta.0510[,c(1:6)],tot.reb.05,tot.withparty.05)
tot.reb.dta.10 <- cbind(dta.1015[,c(1:6)],tot.reb.10,tot.withparty.10)

merge0510 <- merge(tot.reb.dta.05, tot.reb.dta.10, by="PubWhipID",all=T)
merge0510$mostrebel <- (merge0510$tot.reb.05 + merge0510$tot.reb.10)
merge0510 <- merge0510[,c("PubWhipID","mostrebel")]

tot.reb.dta.05 <- merge(merge0510,tot.reb.dta.05, by = "PubWhipID")
tot.reb.dta.10 <- merge(merge0510,tot.reb.dta.10, by = "PubWhipID")

##############################################
##############################################
## Summary Stats for Table 1

# Number of Divisions (Row 1)

ncol(dta.9297)-6
ncol(dta.9701)-6
ncol(dta.0510)-6
ncol(dta.1015)-6

####### Rebellion as a percentage of Rebellion Opportunities (no free votes, no party switchers)
# Row 2

100*(sum(tot.reb.92[dta.9297$Party=="Lab"],na.rm=T)/(sum(dta.9297$Party=="Lab")*(ncol(dta.9297)-6)))
100*(sum(tot.reb.97[dta.9297$Party=="Lab"],na.rm=T)/(sum(dta.9297$Party=="Lab")*(ncol(dta.9297)-6)))
100*(sum(tot.reb.05[dta.9297$Party=="Lab"],na.rm=T)/(sum(dta.9297$Party=="Lab")*(ncol(dta.9297)-6)))
100*(sum(tot.reb.10[dta.9297$Party=="Lab"],na.rm=T)/(sum(dta.9297$Party=="Lab")*(ncol(dta.9297)-6)))

100*(sum(tot.reb.92[dta.9297$Party=="Con"],na.rm=T)/(sum(dta.9297$Party=="Con")*(ncol(dta.9297)-6)))
100*(sum(tot.reb.97[dta.9297$Party=="Con"],na.rm=T)/(sum(dta.9297$Party=="Con")*(ncol(dta.9297)-6)))
100*(sum(tot.reb.05[dta.9297$Party=="Con"],na.rm=T)/(sum(dta.9297$Party=="Con")*(ncol(dta.9297)-6)))
100*(sum(tot.reb.10[dta.9297$Party=="Con"],na.rm=T)/(sum(dta.9297$Party=="Con")*(ncol(dta.9297)-6)))


#### % Divisions experiencing at least one rebel
# Row 3
rebperdiv <- colSums(dta.9297[dta.9297$Party=="Lab",7:ncol(dta.9297)],na.rm=T)
100*(sum(rebperdiv > 0) / (ncol(dta.9297)-6))

rebperdiv <- colSums(dta.9701[dta.9701$Party=="Lab",7:ncol(dta.9701)],na.rm=T)
100*(sum(rebperdiv > 0) / (ncol(dta.9701)-6))

rebperdiv <- colSums(dta.0510[dta.0510$Party=="Lab",7:ncol(dta.0510)],na.rm=T)
100*(sum(rebperdiv > 0) / (ncol(dta.0510)-6))

rebperdiv <- colSums(dta.1015[dta.1015$Party=="Lab",7:ncol(dta.1015)],na.rm=T)
100*(sum(rebperdiv > 0) / (ncol(dta.1015)-6))

####

rebperdiv <- colSums(dta.9297[dta.9297$Party=="Con",7:ncol(dta.9297)],na.rm=T)
100*(sum(rebperdiv > 0) / (ncol(dta.9297)-6))

rebperdiv <- colSums(dta.9701[dta.9701$Party=="Con",7:ncol(dta.9701)],na.rm=T)
100*(sum(rebperdiv > 0) / (ncol(dta.9701)-6))

rebperdiv <- colSums(dta.0510[dta.0510$Party=="Con",7:ncol(dta.0510)],na.rm=T)
100*(sum(rebperdiv > 0) / (ncol(dta.0510)-6))

rebperdiv <- colSums(dta.1015[dta.1015$Party=="Con",7:ncol(dta.1015)],na.rm=T)
100*(sum(rebperdiv > 0) / (ncol(dta.1015)-6))



#### % Divisions experiencing at least 10 rebels
# Row 4

rebperdiv <- colSums(dta.9297[dta.9297$Party=="Lab",7:ncol(dta.9297)],na.rm=T)
100*(sum(rebperdiv > 9) / (ncol(dta.9297)-6))

rebperdiv <- colSums(dta.9701[dta.9701$Party=="Lab",7:ncol(dta.9701)],na.rm=T)
100*(sum(rebperdiv > 9) / (ncol(dta.9701)-6))

rebperdiv <- colSums(dta.0510[dta.0510$Party=="Lab",7:ncol(dta.0510)],na.rm=T)
100*(sum(rebperdiv > 9) / (ncol(dta.0510)-6))

rebperdiv <- colSums(dta.1015[dta.1015$Party=="Lab",7:ncol(dta.1015)],na.rm=T)
100*(sum(rebperdiv > 9) / (ncol(dta.1015)-6))

####

rebperdiv <- colSums(dta.9297[dta.9297$Party=="Con",7:ncol(dta.9297)],na.rm=T)
100*(sum(rebperdiv > 9) / (ncol(dta.9297)-6))

rebperdiv <- colSums(dta.9701[dta.9701$Party=="Con",7:ncol(dta.9701)],na.rm=T)
100*(sum(rebperdiv > 9) / (ncol(dta.9701)-6))

rebperdiv <- colSums(dta.0510[dta.0510$Party=="Con",7:ncol(dta.0510)],na.rm=T)
100*(sum(rebperdiv > 9) / (ncol(dta.0510)-6))

rebperdiv <- colSums(dta.1015[dta.1015$Party=="Con",7:ncol(dta.1015)],na.rm=T)
100*(sum(rebperdiv > 9) / (ncol(dta.1015)-6))




######## % of MPs who rebel at least once


tot.reb.lab.92 <- tot.reb.92[dta.9297$Party=="Lab"]
names(tot.reb.lab.92) <- dta.9297$MPName[dta.9297$Party=="Lab"]
100*(sum(tot.reb.lab.92 > 0)/length(tot.reb.lab.92))

tot.reb.lab.97 <-tot.reb.97[dta.9701$Party=="Lab"]
names(tot.reb.lab.97) <- dta.9701$MPName[dta.9701$Party=="Lab"]
100*(sum(tot.reb.lab.97 > 0)/length(tot.reb.lab.97))

tot.reb.lab.05 <-tot.reb.05[dta.0510$Party=="Lab"]
names(tot.reb.lab.05)<-dta.0510$MPName[dta.0510$Party=="Lab"]
100*(sum(tot.reb.lab.05 > 0)/length(tot.reb.lab.05))

tot.reb.lab.10 <-tot.reb.10[dta.1015$Party=="Lab"]
names(tot.reb.lab.10)<-dta.1015$MPName[dta.1015$Party=="Lab"]
100*(sum(tot.reb.lab.10 > 0)/length(tot.reb.lab.10))

####

tot.reb.con.92 <-tot.reb.92[dta.9297$Party=="Con"]
names(tot.reb.con.92)<-dta.9297$MPName[dta.9297$Party=="Con"]
100*(sum(tot.reb.con.92 > 0)/length(tot.reb.con.92))

tot.reb.con.97 <-tot.reb.97[dta.9701$Party=="Con"]
names(tot.reb.con.97) <- dta.9701$MPName[dta.9701$Party=="Con"]
100*(sum(tot.reb.con.97 > 0)/length(tot.reb.con.97))

tot.reb.con.05 <-tot.reb.05[dta.0510$Party=="Con"]
names(tot.reb.con.05)<-dta.0510$MPName[dta.0510$Party=="Con"]
100*(sum(tot.reb.con.05 > 0)/length(tot.reb.con.05))

tot.reb.con.10 <-tot.reb.10[dta.1015$Party=="Con"]
names(tot.reb.con.10)<-dta.1015$MPName[dta.1015$Party=="Con"]
100*(sum(tot.reb.con.10 > 0)/length(tot.reb.con.10))

##### Number of times the median MP rebels
median(tot.reb.lab.92)
max(tot.reb.lab.92)

median(tot.reb.lab.97)
max(tot.reb.lab.97)

median(tot.reb.lab.05)
max(tot.reb.lab.05)

median(tot.reb.lab.10)
max(tot.reb.lab.10)
######
median(tot.reb.con.92)
max(tot.reb.con.92)

median(tot.reb.con.97)
max(tot.reb.con.97)

median(tot.reb.con.05)
max(tot.reb.con.05)

median(tot.reb.con.10)
max(tot.reb.con.10)



## Generate other relevant variables (Government, firstterm, median rebel, q75, etc...)


tot.reb.dta.92$firstterm <- 1
tot.reb.dta.97$firstterm <- 0

tot.reb.dta.92$gov <- 0
tot.reb.dta.92$gov[tot.reb.dta.92$Party=="Con"] <- 1 

tot.reb.dta.97$gov <- 0
tot.reb.dta.97$gov[tot.reb.dta.97$Party=="Lab"] <- 1

median.reb.lab.92 <- median(tot.reb.dta.92$tot.reb.92[tot.reb.dta.92$Party=="Lab"])
median.reb.con.92 <- median(tot.reb.dta.92$tot.reb.92[tot.reb.dta.92$Party=="Con"])

tot.reb.dta.92$reb.med.92 <- 0
tot.reb.dta.92$reb.med.92[tot.reb.dta.92$tot.reb.92 > median.reb.lab.92 & tot.reb.dta.92$Party=="Lab" ] <- 1
tot.reb.dta.92$reb.med.92[tot.reb.dta.92$tot.reb.92 > median.reb.con.92 & tot.reb.dta.92$Party=="Con" ] <- 1

q75.reb.lab.92 <- quantile(tot.reb.dta.92$tot.reb.92[tot.reb.dta.92$Party=="Lab"],0.75)
q75.reb.con.92 <- quantile(tot.reb.dta.92$tot.reb.92[tot.reb.dta.92$Party=="Con"],0.75)

tot.reb.dta.92$reb.q75.92 <- 0
tot.reb.dta.92$reb.q75.92[tot.reb.dta.92$tot.reb.92 > q75.reb.lab.92 & tot.reb.dta.92$Party=="Lab" ] <- 1
tot.reb.dta.92$reb.q75.92[tot.reb.dta.92$tot.reb.92 > q75.reb.con.92 & tot.reb.dta.92$Party=="Con" ] <- 1

median.reb.lab.97 <- median(tot.reb.dta.97$tot.reb.97[tot.reb.dta.97$Party=="Lab"])
median.reb.con.97 <- median(tot.reb.dta.97$tot.reb.97[tot.reb.dta.97$Party=="Con"])

tot.reb.dta.97$reb.med.97 <- 0
tot.reb.dta.97$reb.med.97[tot.reb.dta.97$tot.reb.97 > median.reb.lab.97 & tot.reb.dta.97$Party=="Lab" ] <- 1
tot.reb.dta.97$reb.med.97[tot.reb.dta.97$tot.reb.97 > median.reb.con.97 & tot.reb.dta.97$Party=="Con" ] <- 1

q75.reb.lab.97 <- quantile(tot.reb.dta.97$tot.reb.97[tot.reb.dta.97$Party=="Lab"],0.75)
q75.reb.con.97 <- quantile(tot.reb.dta.97$tot.reb.97[tot.reb.dta.97$Party=="Con"],0.75)

tot.reb.dta.97$reb.q75.97 <- 0
tot.reb.dta.97$reb.q75.97[tot.reb.dta.97$tot.reb.97 > q75.reb.lab.97 & tot.reb.dta.97$Party=="Lab" ] <- 1
tot.reb.dta.97$reb.q75.97[tot.reb.dta.97$tot.reb.97 > q75.reb.con.97 & tot.reb.dta.97$Party=="Con" ] <- 1


names(tot.reb.dta.92) <- c("PubWhipID","mostrebel","Name","MPName","Constituency","Party","leader","tot.reb","tot.withparty","firstterm","gov","med","q75")    
names(tot.reb.dta.97) <- c("PubWhipID","mostrebel","Name","MPName","Constituency","Party","leader","tot.reb","tot.withparty","firstterm","gov","med","q75")    

tot.reb.dta.05$firstterm <- 1
tot.reb.dta.10$firstterm <- 0

tot.reb.dta.05$gov <- 0
tot.reb.dta.05$gov[tot.reb.dta.05$Party=="Lab"] <- 1 

tot.reb.dta.10$gov <- 0
tot.reb.dta.10$gov[tot.reb.dta.10$Party=="Con"] <- 1

median.reb.lab.05 <- median(tot.reb.dta.05$tot.reb.05[tot.reb.dta.05$Party=="Lab"])
median.reb.con.05 <- median(tot.reb.dta.05$tot.reb.05[tot.reb.dta.05$Party=="Con"])

tot.reb.dta.05$reb.med.05 <- 0
tot.reb.dta.05$reb.med.05[tot.reb.dta.05$tot.reb.05 > median.reb.lab.05 & tot.reb.dta.05$Party=="Lab" ] <- 1
tot.reb.dta.05$reb.med.05[tot.reb.dta.05$tot.reb.05 > median.reb.con.05 & tot.reb.dta.05$Party=="Con" ] <- 1

q75.reb.lab.05 <- quantile(tot.reb.dta.05$tot.reb.05[tot.reb.dta.05$Party=="Lab"],0.75)
q75.reb.con.05 <- quantile(tot.reb.dta.05$tot.reb.05[tot.reb.dta.05$Party=="Con"],0.75)

tot.reb.dta.05$reb.q75.05 <- 0
tot.reb.dta.05$reb.q75.05[tot.reb.dta.05$tot.reb.05 > q75.reb.lab.05 & tot.reb.dta.05$Party=="Lab" ] <- 1
tot.reb.dta.05$reb.q75.05[tot.reb.dta.05$tot.reb.05 > q75.reb.con.05 & tot.reb.dta.05$Party=="Con" ] <- 1

median.reb.lab.10 <- median(tot.reb.dta.10$tot.reb.10[tot.reb.dta.10$Party=="Lab"])
median.reb.con.10 <- median(tot.reb.dta.10$tot.reb.10[tot.reb.dta.10$Party=="Con"])

tot.reb.dta.10$reb.med.10 <- 0
tot.reb.dta.10$reb.med.10[tot.reb.dta.10$tot.reb.10 > median.reb.lab.10 & tot.reb.dta.10$Party=="Lab" ] <- 1
tot.reb.dta.10$reb.med.10[tot.reb.dta.10$tot.reb.10 > median.reb.con.10 & tot.reb.dta.10$Party=="Con" ] <- 1

q75.reb.lab.10 <- quantile(tot.reb.dta.10$tot.reb.10[tot.reb.dta.10$Party=="Lab"],0.75)
q75.reb.con.10 <- quantile(tot.reb.dta.10$tot.reb.10[tot.reb.dta.10$Party=="Con"],0.75)

tot.reb.dta.10$reb.q75.10 <- 0
tot.reb.dta.10$reb.q75.10[tot.reb.dta.10$tot.reb.10 > q75.reb.lab.10 & tot.reb.dta.10$Party=="Lab" ] <- 1
tot.reb.dta.10$reb.q75.10[tot.reb.dta.10$tot.reb.10 > q75.reb.con.10 & tot.reb.dta.10$Party=="Con" ] <- 1



names(tot.reb.dta.05) <- c("PubWhipID","mostrebel","Name","MPName","Constituency","Party","leader","tot.reb","tot.withparty","firstterm","gov","med","q75")    
   
names(tot.reb.dta.10) <- c("PubWhipID","mostrebel","Name","MPName","Constituency","Party","leader","tot.reb","tot.withparty","firstterm","gov","med","q75")    
   
##############
# merge in majority info and combine periods
maj92 <-read.csv("1992-1993/votes.csv")
maj92 <- maj92[,c("PubWhipID","Majority")] 
maj97 <-read.csv("1997-1998/votes.csv")
maj97 <- maj97[,c("PubWhipID","Majority")] 

tot.reb.dta.92 <- merge(tot.reb.dta.92,maj92, by="PubWhipID")
tot.reb.dta.97 <- merge(tot.reb.dta.97,maj97, by="PubWhipID")

maj05 <-read.csv("2005-2010/votes.csv")
maj05 <- maj05[,c("PubWhipID","Majority")] 
maj10 <-read.csv("2010-2015/votes.csv")
maj10 <- maj10[,c("PubWhipID","Majority")] 

tot.reb.dta.05 <- merge(tot.reb.dta.05,maj05, by="PubWhipID")
tot.reb.dta.10 <- merge(tot.reb.dta.10,maj10, by="PubWhipID")


tot.reb.dta.9297 <- as.data.frame(rbind(tot.reb.dta.92, tot.reb.dta.97))
tot.reb.dta.0510 <- as.data.frame(rbind(tot.reb.dta.05, tot.reb.dta.10))

tot.reb.dta.9297$Majority <- tot.reb.dta.9297$Majority/10000
tot.reb.dta.0510$Majority <- tot.reb.dta.0510$Majority/10000


### Merge in tenure variables.

startyear <- read.csv("~/Dropbox (Personal)/Rebel Summaries/Cleaned Data and Scripts for Analysis/calcdata/startdates.csv")

startyear$start_year97 <- (1997 - startyear$start_year)/(1997 - min(startyear$start_year))
startyear$start_year10 <- (2010 - startyear$start_year)/(2010 - min(startyear$start_year))

start97 <- startyear[,c("PubWhipID","start_year97")]
start10 <- startyear[,c("PubWhipID","start_year10")]

colnames(start97) <- c("PubWhipID","start_year")
colnames(start10) <- c("PubWhipID","start_year")


tot.reb.dta.9297 <- join(tot.reb.dta.9297,start97, by="PubWhipID", type="left")
tot.reb.dta.0510 <- join(tot.reb.dta.0510,start10, by="PubWhipID", type="left")


##################################################
##################################################
# Read in speech data and drop free votes

dta.92.s <- read.csv("1992-1993/speeches.csv")
dta.93.s <- read.csv("1993-1994/speeches.csv")
dta.94.s <- read.csv("1994-1995/speeches.csv")
dta.95.s <- read.csv("1995-1996/speeches.csv")
dta.96.s <- read.csv("1996-1997/speeches.csv")

dta.temp <- join(dta.92.s,dta.93.s,by="MPName",type="full")
dta.temp <- join(dta.temp,dta.94.s,by="MPName",type="full")
dta.temp <- join(dta.temp,dta.95.s,by="MPName",type="full")
dta.9297.s <- join(dta.temp,dta.96.s,by="MPName",type="full")

dta.97.s <- read.csv("1997-1998/speeches.csv")
dta.98.s <- read.csv("1998-1999/speeches.csv")
dta.99.s <- read.csv("1999-2000/speeches.csv")
dta.00.s <- read.csv("2000-2001/speeches.csv")

dta.temp <- join(dta.97.s,dta.98.s,by="MPName",type="full")
dta.temp <- join(dta.temp,dta.99.s,by="MPName",type="full")
dta.9701.s <- join(dta.temp,dta.00.s,by="MPName",type="full")

# Drop free votes
dta.9297.s <- dta.9297.s[,colnames(dta.9297.s) %in% fdiv.9297==F]
dta.9701.s <- dta.9701.s[,colnames(dta.9701.s) %in% fdiv.9701==F]


dta.0510.s <- read.csv("2005-2010/speeches.csv")
dta.1015.s <- read.csv("2010-2015/speeches.csv")

# Drop free votes 
dta.0510.s <- dta.0510.s[,colnames(dta.0510.s) %in% fdiv.0510==F]
dta.1015.s <- dta.1015.s[,colnames(dta.1015.s) %in% fdiv.1015==F]


### Join in leader data to match and subset correctly
dta.9297.s <- join(dta.9297.s, bin.leader9297,by="PubWhipID", type="inner") 
dta.9701.s <- join(dta.9701.s, bin.leader9701,by="PubWhipID", type="inner") 

dta.9297.s <- dta.9297.s[,c(1:5,ncol(dta.9297.s),7:ncol(dta.9297.s)-1)]
dta.9701.s <- dta.9701.s[,c(1:5,ncol(dta.9701.s),7:ncol(dta.9701.s)-1)]

dta.0510.s <- join(dta.0510.s, bin.leader0510,by="PubWhipID", type="inner") 
dta.1015.s <- join(dta.1015.s, bin.leader1015,by="PubWhipID", type="inner") 

dta.0510.s <- dta.0510.s[,c(1:5,ncol(dta.0510.s),7:ncol(dta.0510.s)-1)]
dta.1015.s <- dta.1015.s[,c(1:5,ncol(dta.1015.s),7:ncol(dta.1015.s)-1)]

##############
# Match Speech and Votes Matrices
##############

dta.v.92 <- dta.9297[,c(7:ncol(dta.9297))]
dta.s.92 <- dta.9297.s[,c(7:ncol(dta.9297.s))]
dta.v.97 <- dta.9701[,c(7:ncol(dta.9701))]
dta.s.97 <- dta.9701.s[,c(7:ncol(dta.9701.s))]

dta.v.05 <- dta.0510[,c(7:ncol(dta.0510))]
dta.s.05 <- dta.0510.s[,c(7:ncol(dta.0510.s))]
dta.v.10 <- dta.1015[,c(7:ncol(dta.1015))]
dta.s.10 <- dta.1015.s[,c(7:ncol(dta.1015.s))]

rebmat.92    <- dta.v.92==1
speechmat.92 <- dta.s.92>=1

rebmat.97    <- dta.v.97==1
speechmat.97 <- dta.s.97>=1

rebmat.05    <- dta.v.05==1
speechmat.05 <- dta.s.05>=1

rebmat.10    <- dta.v.10==1
speechmat.10 <- dta.s.10>=1

rebspeech.92      <- rebmat.92==T & speechmat.92==T
nonrebspeech.92   <- rebmat.92==F & speechmat.92==T

rebspeech.97      <- rebmat.97==T & speechmat.97==T
nonrebspeech.97   <- rebmat.97==F & speechmat.97==T

rebspeech.05      <- rebmat.05==T & speechmat.05==T
nonrebspeech.05   <- rebmat.05==F & speechmat.05==T

rebspeech.10      <- rebmat.10==T & speechmat.10==T
nonrebspeech.10   <- rebmat.10==F & speechmat.10==T

tot.rebspeech.92    <- rowSums(rebspeech.92,na.rm=T)
tot.nonrebspeech.92 <- rowSums(nonrebspeech.92,na.rm=T)
tot.speech.92       <- rowSums(speechmat.92,na.rm=T)

tot.rebspeech.97    <- rowSums(rebspeech.97,na.rm=T)
tot.nonrebspeech.97 <- rowSums(nonrebspeech.97,na.rm=T)
tot.speech.97       <- rowSums(speechmat.97,na.rm=T)

tot.rebspeech.05    <- rowSums(rebspeech.05,na.rm=T)
tot.nonrebspeech.05 <- rowSums(nonrebspeech.05,na.rm=T)
tot.speech.05       <- rowSums(speechmat.05,na.rm=T)

tot.rebspeech.10    <- rowSums(rebspeech.10,na.rm=T)
tot.nonrebspeech.10 <- rowSums(nonrebspeech.10,na.rm=T)
tot.speech.10       <- rowSums(speechmat.10,na.rm=T)

speechdta92 <- cbind(dta.9297[,c("Party","PubWhipID")],tot.rebspeech.92,tot.nonrebspeech.92,tot.speech.92)
speechdta97 <- cbind(dta.9701[,c("Party","PubWhipID")],tot.rebspeech.97,tot.nonrebspeech.97,tot.speech.97)
speechdta05 <- cbind(dta.0510[,c("Party","PubWhipID")],tot.rebspeech.05,tot.nonrebspeech.05,tot.speech.05)
speechdta10 <- cbind(dta.1015[,c("Party","PubWhipID")],tot.rebspeech.10,tot.nonrebspeech.10,tot.speech.10)

colnames(speechdta92) <- c("Party","PubWhipID","Rebspeech","Nonrebspeech","totspeech")
colnames(speechdta97) <- c("Party","PubWhipID","Rebspeech","Nonrebspeech","totspeech")
colnames(speechdta05) <- c("Party","PubWhipID","Rebspeech","Nonrebspeech","totspeech")
colnames(speechdta10) <- c("Party","PubWhipID","Rebspeech","Nonrebspeech","totspeech")

speechdta92$gov <- 0
speechdta92$gov[speechdta92$Party=="Con"] <- 1
speechdta97$gov <- 0
speechdta97$gov[speechdta97$Party=="Lab"] <- 1

speechdta05$gov <- 0
speechdta05$gov[speechdta05$Party=="Lab"] <- 1
speechdta10$gov <- 0
speechdta10$gov[speechdta10$Party=="Con"] <- 1

speechdta9297 <- rbind(speechdta92, speechdta97)
speechdta0510 <- rbind(speechdta05, speechdta10)

##################################################
# Merge speech and rebel data

all.dta.9297 <- merge(tot.reb.dta.9297,speechdta9297, by =c("PubWhipID","gov","Party"))
all.dta.0510 <- merge(tot.reb.dta.0510,speechdta0510, by =c("PubWhipID","gov","Party"))


### Save output into the CalcData folder
save(all.dta.9297,file=paste(path2,"rebeldta9297.rda", sep="/"))
save(all.dta.0510,file=paste(path2,"rebeldta0510.rda", sep="/"))

